# Replication file for "Immigration and the Sociocultural Divide in Central and Eastern Europe: Stasis or Evolution?" by Caroline Marie Lancaster. Please contact cml93@live.unc.edu with any questions. 

# This file creates Figures 2 and 3 in the main text using the Chapel Hill Expert Survey 1999-2019 trend file.

# 28 June 2021
library(ggplot2)
library(dplyr)

ches <- read.csv('1999-2019_CHES_dataset_means(v2).csv')

#removing Latvia, which is not in EVS, as well as Malta and Cyprus, which are not post-communist
ches <- subset(ches, country != 24 & country != 37 & country != 40 & eastwest == 0 & year >=2010)

#creating GAL-TAN categorization
ches$gt <- ifelse(ches$galtan <= 4, 'GAL', NA)
ches$gt <- ifelse(ches$galtan >= 7, 'TAN', ches$gt)
ches$gt <- ifelse(is.na(ches$gt), 'Center', ches$gt)
ches$gt <- factor(ches$gt, levels = c('GAL', 'Center', 'TAN'))

#filling in missing vote shares from Parlgov
ches$vote <- ifelse(ches$year == 2010 & ches$party == 'BNS', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2010 & ches$party == 'SF', 0.7, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'Ataka', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'BMRO', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'HKS', 1.75, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'Lewica Razem', 1, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'PRO', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'Slavi Trifonov', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'LVZS', 22.5, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'VKM-AMT', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'PS', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'SPOLU', 0, ches$vote)
ches$vote <- ifelse(ches$year == 2019 & ches$party == 'Za Ludi', 0, ches$vote)

#CHES immigration salience 
chessub <- subset(ches, year == 2010 | year == 2019)
chessub$year <- as.factor(chessub$year)

#Figure 2
salplot <- ggplot(chessub, aes(x = year, y=immigrate_salience, color = gt, weight = vote)) + geom_boxplot()+ theme_bw() + ylab('Immigration Salience') + xlab('Year')+ scale_color_manual(name="Party Type", labels=c('GAL', 'Center', 'TAN'), values = c("grey70",'grey45',"grey18"))

###### CHES party position correlations
#must run function to get ability to compute confidence intervals
kendall.ci<-function (x=NULL, y=NULL, alpha=0.05, type="t", bootstrap=F, B=1000, example=F)  {
  # This will produce a 1 - alpha CI for
  # Kendall's tau.  Based on sections 8.3 and 8.4 of:
  #
  #   Nonparametric Statistical Methods, 3e
  #   Hollander, Wolfe & Chicken 
  #
  # bootstrap = F will find the asymptotic CI as in section 8.3.
  # bootstrap = T will find a bootstrap CI as in section 8.4
  # type can be "t" (two-sided), "u" (upper) or "l" (lower).
  # B is the number of bootstrap replicates.
  #
  # Inefficiently programmed by Eric Chicken, October 2012.
  
  # Example 8.1 from HW&C
  if(example)
  {
    x <- c(44.4, 45.9, 41.9, 53.3, 44.7, 44.1, 50.7, 45.2, 60.1)
    y <- c(2.6, 3.1, 2.5, 5, 3.6, 4, 5.2, 2.8, 3.8)
  }
  
  continue <- T
  
  if(is.null(x) | is.null(y)) 
  {	
    cat("\n")
    cat("You must supply an x sample and a y sample!", "\n")
    cat("\n")
    continue <- F
  }
  
  if(continue & (length(x) != length(y))) 
  {	
    cat("\n")
    cat("Samples must be of the same length!", "\n")
    cat("\n")
    continue <- F
  }
  
  if(continue & (length(x) <= 1)) 
  {	
    cat("\n")
    cat("Sample size n must be at least two!", "\n")
    cat("\n")
    continue <- F
  }
  
  if(continue & (type!="t" & type!="l" & type!="u")) 
  {	
    cat("\n")
    cat("Argument \"type\" must be one of \"s\" (symmetric), \"l\" (lower) or \"u\" (upper)!", "\n")
    cat("\n")
    continue <- F
  }
  
  # Q* from (8.17)
  Q <- function(i, j)
  {
    Q.ij <- 0
    ij <- (j[2] - i[2]) * (j[1] - i[1])
    if(ij > 0) Q.ij <- 1
    if(ij < 0) Q.ij <- -1
    Q.ij
  }
  
  # C.i from (8.37)
  C.i <- function(x, y, i)
  {
    C.i <- 0
    for(k in 1:length(x))
      if(k != i) 
        C.i <- C.i + Q(c(x[i], y[i]), c(x[k], y[k]))
    C.i		
  }
  
  if(continue & !bootstrap)
  {
    c.i <- numeric(0)
    n <- length(x)
    for(i in 1:n) c.i <- c(c.i, C.i(x, y, i))
    
    # Get the estimate of tau from the existing function cor.test
    # (8.34)
    # Temporarily disable warnings about p-values and ties
    options("warn" = -1)
    tau.hat <- cor.test(x, y, method="k")$estimate
    options("warn" = 0)
    
    # (8.38)
    sigma.hat.2 <- 2 * (n - 2) * var(c.i) / n / (n-1)
    sigma.hat.2 <- sigma.hat.2 + 1 - (tau.hat)^2
    sigma.hat.2 <- sigma.hat.2 * 2 / n / (n - 1)
    
    if(type=="t") z <- qnorm(alpha / 2, lower.tail = F)
    if(type!="t") z <- qnorm(alpha, lower.tail = F)
    # (8.39), (8.43), (8.45)
    tau.L <- tau.hat - z * sqrt(sigma.hat.2)
    tau.U <- tau.hat + z * sqrt(sigma.hat.2)
    if(type=="l") tau.U <- 1
    if(type=="u") tau.L <- -1
  }
  
  if(continue & bootstrap)
  {
    tau <- numeric(0)
    for(b in 1:B)
    {
      b.sample <- sample(1:length(x), length(x), replace=T)
      # Temporarily disable warnings about p-values and ties
      options("warn" = -1)
      tau.sample <- cor.test(x[b.sample], y[b.sample], method="k")
      options("warn" = 0)
      tau.sample <- tau.sample$estimate
      tau <- c(tau, tau.sample)
    }
    tau.hat <- sort(tau)
    hist(tau.hat)
    if(type=="t") k <- floor((B + 1) * alpha / 2)
    if(type!="t") k <- floor((B + 1) * alpha)
    tau.L <- tau.hat[k]
    tau.U <- tau.hat[(B + 1 - k)]
    if(type=="l") tau.U <- 1
    if(type=="u") tau.L <- -1
    
  }
  
  tau.L <- round(tau.L, 3)
  tau.U <- round(tau.U, 3)
  
  if(type=="t") print.type <- " two-sided CI for tau:"
  if(type=="l") print.type <- " lower bound for tau:"
  if(type=="u") print.type <- " upper bound for tau:"
  
  cat("\n")
  cat(paste("1 - alpha = ", 1 - alpha, print.type, sep=""))
  cat("\n")
  cat(paste(tau.L, ", ", tau.U, sep=""), "\n")
  cat("\n")
  invisible(c(tau.L,tau.U))
}

ches10 <- subset(ches, year == 2010)

cor2010 <- cor.test(as.numeric(ches10$galtan), ches10$immigrate_policy, method = 'kendall')
se2010 <- kendall.ci(ches10$galtan, ches10$immigrate_policy, bootstrap = T)
cor2010 <- as.data.frame(cbind(cor2010$estimate, se2010[1], se2010[2]))
cor2010$year <- 2010
cor2010$issue <- 'immig'

ches14 <- subset(ches, year == 2014)

cor2014 <- cor.test(as.numeric(ches14$galtan), ches14$immigrate_policy, method = 'kendall')
se2014 <- kendall.ci(ches14$galtan, ches14$immigrate_policy, bootstrap = T)
cor2014 <- as.data.frame(cbind(cor2014$estimate, se2014[1], se2014[2]))
cor2014$year <- 2014
cor2014$issue <- 'immig'

ches19 <- subset(ches, year == 2019)

cor2019 <- cor.test(as.numeric(ches19$galtan), ches19$immigrate_policy, method = 'kendall')
se2019 <- kendall.ci(ches19$galtan, ches19$immigrate_policy, bootstrap = T)
cor2019 <- as.data.frame(cbind(cor2019$estimate, se2019[1], se2019[2]))
cor2019$year <- 2019
cor2019$issue <- 'immig'

cors <- rbind(cor2010, cor2014,cor2019)
colnames(cors) <- c('mean', 'cilow', 'cihi','year', 'issue')

cor2010a <- cor.test(as.numeric(ches10$galtan), ches10$sociallifestyle, method = 'kendall')
se2010a <- kendall.ci(ches10$galtan, ches10$sociallifestyle, bootstrap = T)
cor2010a <- as.data.frame(cbind(cor2010a$estimate, se2010a[1], se2010a[2]))
cor2010a$year <- 2010
cor2010a$issue <- 'life'

cor2014a <- cor.test(as.numeric(ches14$galtan), ches14$sociallifestyle, method = 'kendall')
se2014a <- kendall.ci(ches14$galtan, ches14$sociallifestyle, bootstrap = T)
cor2014a <- as.data.frame(cbind(cor2014a$estimate, se2014a[1], se2014a[2]))
cor2014a$year <- 2014
cor2014a$issue <- 'life'

cor2019a <- cor.test(as.numeric(ches19$galtan), ches19$sociallifestyle, method = 'kendall')
se2019a <- kendall.ci(ches19$galtan, ches19$sociallifestyle, bootstrap = T)
cor2019a <- as.data.frame(cbind(cor2019a$estimate, se2019a[1], se2019a[2]))
cor2019a$year <- 2019
cor2019a$issue <- 'life'

corsa <- rbind(cor2010a, cor2014a,cor2019a)
colnames(corsa) <- c('mean', 'cilow', 'cihi','year', 'issue')

cor2010b <- cor.test(as.numeric(ches10$immigrate_policy), ches10$sociallifestyle, method = 'kendall')
se2010b <- kendall.ci(ches10$immigrate_policy, ches10$sociallifestyle, bootstrap = T)
cor2010b <- as.data.frame(cbind(cor2010b$estimate, se2010b[1], se2010b[2]))
cor2010b$year <- 2010
cor2010b$issue <- 'both'

cor2014b <- cor.test(as.numeric(ches14$immigrate_policy), ches14$sociallifestyle, method = 'kendall')
se2014b <- kendall.ci(ches14$immigrate_policy, ches14$sociallifestyle, bootstrap = T)
cor2014b <- as.data.frame(cbind(cor2014b$estimate, se2014b[1], se2014b[2]))
cor2014b$year <- 2014
cor2014b$issue <- 'both'

cor2019b <- cor.test(as.numeric(ches19$immigrate_policy), ches19$sociallifestyle, method = 'kendall')
se2019b <- kendall.ci(ches19$immigrate_policy, ches19$sociallifestyle, bootstrap = T)
cor2019b <- as.data.frame(cbind(cor2019b$estimate, se2019b[1], se2019b[2]))
cor2019b$year <- 2019
cor2019b$issue <- 'both'

corsb <- rbind(cor2010b,cor2014b,cor2019b)
colnames(corsb) <- c('mean', 'cilow', 'cihi','year', 'issue')

corsfull <- rbind(cors, corsa, corsb)
corsfull$issue <- factor(corsfull$issue, levels = c('immig', 'life', 'both'))

#Figure 3
corplot <- ggplot(corsfull, aes(x = as.factor(year), y = mean, ymin = cilow, ymax = cihi, color = issue)) + geom_point(position = position_dodge(0.5)) + geom_errorbar(width = 0,position = position_dodge(0.5)) + theme_bw() + scale_color_manual(name="Position Correlations", labels=c('corr(immig., GAL-TAN)', 'corr(lifestyle, GAL-TAN)', 'corr(immig., lifestyle)'), values = c("grey18",'grey45',"grey70")) + labs(x = "Year", y = "Kendall's Tau") +theme_bw() + scale_x_discrete(labels=c('2010' , '2014', '2019')) + ylim(.40, 1.0)

####### Table 11, Appendix B
c2010 <- ches10 %>% group_by(country) %>% dplyr::summarize(mean = weighted.mean(immigrate_salience, vote))

c2019 <- ches19 %>% group_by(country) %>% dplyr::summarize(mean = weighted.mean(immigrate_salience, vote, na.rm = T))

######### END OF SCRIPT